# simu parameters
set.seed(7)
n=200
p=14
r=1
type="scale-free"
plot=TRUE
################
#----- DATA
# simulate graph and omega, then sigma0 and finally counts
data=data_from_scratch(type = type,p = p+r,n = n,signed = FALSE,prob = 5/p,v = 0)
## Generating data from the multivariate normal distribution with the scale-free graph structure....done.
omega=data$omega
hidden=which(diag(omega)%in%sort(diag(omega), decreasing = TRUE)[1:r])[1:r] # on cache les r plus gros
trueClique=which(omega[hidden,-hidden]!=0)
if(plot){
G=draw_network(1*(omega==1),groupes=1*(diag(omega)==diag(omega)[hidden][1]),
layout="nicely",curv=0,nb=2,pal="black",nodes_label =rep("",p+r))$G
print(G)
}
Kh <- omega[hidden,hidden]
Ko <- omega[-hidden,-hidden]
Koh <- omega[-hidden,hidden]
Km <- Ko - Koh %*%solve(Kh)%*% t(Koh)
sigmaO=solve(Km)
counts=generator_PLN(sigmaO,covariates = NULL,n=n)
ome_init=omega[c(2:15,1),c(2:15,1)] #ome is for testing
ome=ome_init
diag(ome)=0
#----- PLN on counts
# optimization of theta and h(Z_O)
PLNfit<-PLN(counts~1)
##
## Initialization...
## Adjusting a PLN model with full covariance model
## Post-treatments...
## DONE!
MO<-PLNfit$var_par$M # MiO = ith row of MO
SO<-PLNfit$var_par$S # SiO = diag(ith row of SO)
sigma_obs=PLNfit$model_par$Sigma
Study of the prefered quantity of noise in this example:
B=20 ; coeff=seq(0.1,4,0.5)
varyCoeff=lapply(coeff,function(c){
res=cbind( data.frame( t(sapply(1:B, function(x){
init.mclust(sigma_obs,title="Sigma",
trueClique = trueClique,n.noise=round(p*c), plot=FALSE)$FPN
}))),c)
})
varyCoeff=do.call(rbind,varyCoeff)
colnames(varyCoeff)=c("FN","FP","coeff")
varyCoeff %>% mutate(nul = (FN==1 & FP==0)) %>% filter(!nul) %>% dplyr::select(-nul) %>%
group_by(coeff) %>% summarise(sumFP=sum(FP), sumFN=sum(FN), sum=sum(FP+FN)) %>%
kable() %>%
kable_styling()
| coeff | sumFP | sumFN | sum |
|---|---|---|---|
| 0.1 | 2.857143 | 2.857143 | 5.714286 |
| 0.6 | 2.571429 | 3.857143 | 6.428571 |
| 1.1 | 2.142857 | 5.142857 | 7.285714 |
| 1.6 | 1.142857 | 5.142857 | 6.285714 |
| 2.1 | 1.714286 | 6.571429 | 8.285714 |
| 2.6 | 2.000000 | 6.285714 | 8.285714 |
| 3.1 | 2.142857 | 6.428571 | 8.571429 |
| 3.6 | 1.000000 | 7.142857 | 8.142857 |
initviasigma=init.mclust((cov2cor(sigma_obs)),title="Sigma",
trueClique = trueClique,n.noise=1)$init
initial.param<-initEM(sigma_obs,n=n,cliqueList = list(initviasigma),cst=1.05, pca=TRUE) # quick and dirty modif for initEM to take a covariance matrix as input
omega_init=initial.param$K0
sigma_init=initial.param$Sigma0
pr=prcomp(t(counts[,initviasigma]),scale. = FALSE)
MHinit = matrix(pr$rotation[,1]*pr$sdev[1],nrow=n,ncol=r)
O=1:p
Wg_init <- matrix(1, p+r, p+r); diag(Wg_init) = 0; Wg_init =Wg_init / sum(Wg_init)
W_init <- matrix(1, p+r, p+r); diag(W_init) = 0; W_init =W_init / sum(W_init)
W_init[O,O] <- EMtree_corZ(cov2cor(sigma_obs),n = n,maxIter = 20)$edges_weight
##
## Likelihoods: 70.3395 , 70.38846 , 70.38846 ,
## Convergence took 0.36 secs and 3 iterations.
## Likelihood difference = 1.724393e-08
## Betas difference = 2.524023e-16
resVEM<-VEMtree(counts,MO,SO,MH=MHinit,omega_init,W_init,Wg_init, eps=5e-3,
alpha=1,
maxIter=6, plot=TRUE,vraiOm=ome_init, condTrack=TRUE)
##
## Iter n°1
## , diffinvSigO=3.3799
## Iter n°2
## , diffinvSigO=1.82113
## Iter n°3
## , diffinvSigO=0.9517
## Iter n°4
## , diffinvSigO=0.25152
## Iter n°5
## , diffinvSigO=0.26242
## Iter n°6
## , diffinvSigO=0.13304
## VEMtree ran in 3.771secs and 6 iterations.
## Final weights difference: 0.0069
obs=data.frame(value=F_Sym2Vec(resVEM$Pg[1:p,1:p]), key="obs")
hid= data.frame(value=resVEM$Pg[1:p,p+1], key="hid")
rbind(obs, hid)%>%
ggplot(aes(value, fill=key, color=key))+geom_histogram()+theme_light()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plotVEM(resVEM$Pg,ome,r=1,seuil=0.5)
values=courbes_seuil(probs = resVEM$Pg,omega = ome,h = 15,seq_seuil = seq(0,1,0.02))
values %>% mutate(crit = PPV+TPR) %>% filter(crit==max(crit, na.rm=TRUE)) %>%
summarise(mins=min(seuil), maxs=max(seuil), PPV=max(PPV), PPVO=max(PPVO),PPVH=max(PPVH),
TPR=max(TPR),TPRO=max(TPRO),TPRH=max(TPRH)) %>%
kable() %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "float_right")
| mins | maxs | PPV | PPVO | PPVH | TPR | TPRO | TPRH |
|---|---|---|---|---|---|---|---|
| 0.16 | 0.64 | 0.87 | 0.88 | 0.86 | 0.93 | 1 | 0.86 |
Ce tableau repère pour quelles plage de seuil on a une valeure maximale pour la quantité PPV+TPR, et affiche les valeurs correspondantes pour PPVO, PPVH, TPRO et TPRH.
plotVerdict(values, seuil)+guides(color=FALSE)
resVEM<-VEMtree(counts,MO,SO,MH=MHinit,omega_init,W_init,Wg_init, eps=5e-3,
alpha=0.9,
maxIter=6, plot=TRUE,vraiOm=ome_init, condTrack=TRUE)
##
## Iter n°1
## , diffinvSigO=3.3799
## Iter n°2
## , diffinvSigO=1.80801
## Iter n°3
## , diffinvSigO=0.95938
## Iter n°4
## , diffinvSigO=0.25699
## Iter n°5
## , diffinvSigO=0.04308
## VEMtree ran in 2.455secs and 5 iterations.
## Final weights difference: 9e-04
obs=data.frame(value=F_Sym2Vec(resVEM$Pg[1:p,1:p]), key="obs")
hid= data.frame(value=resVEM$Pg[1:p,p+1], key="hid")
rbind(obs, hid)%>%
ggplot(aes(value, fill=key, color=key))+geom_histogram()+theme_light()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plotVEM(resVEM$Pg,ome,r=1,seuil=0.5)
values=courbes_seuil(probs = resVEM$Pg,omega = ome,h = 15,seq_seuil = seq(0,1,0.02))
values %>% mutate(crit = PPV+TPR) %>% filter(crit==max(crit, na.rm=TRUE)) %>%
summarise(mins=min(seuil), maxs=max(seuil), PPV=max(PPV), PPVO=max(PPVO),PPVH=max(PPVH),
TPR=max(TPR),TPRO=max(TPRO),TPRH=max(TPRH)) %>%
kable() %>% kable_styling()
| mins | maxs | PPV | PPVO | PPVH | TPR | TPRO | TPRH |
|---|---|---|---|---|---|---|---|
| 0.14 | 0.66 | 0.87 | 0.88 | 0.86 | 0.93 | 1 | 0.86 |
plotVerdict(values, seuil)+guides(color=FALSE)